{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "39616240-0e59-45d8-88c1-35b87e340324",
   "metadata": {},
   "source": [
    "# Apaca Lora Training"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bedc0878-0e40-4632-8162-7d2227809657",
   "metadata": {},
   "source": [
    "## Setup LFS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "326a6f80-0723-40ee-812f-8164e6b038a3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-04-02T12:27:38.028091Z",
     "iopub.status.busy": "2023-04-02T12:27:38.027420Z",
     "iopub.status.idle": "2023-04-02T12:27:46.145638Z",
     "shell.execute_reply": "2023-04-02T12:27:46.143111Z",
     "shell.execute_reply.started": "2023-04-02T12:27:38.028032Z"
    }
   },
   "outputs": [],
   "source": [
    "!apt update\n",
    "!apt install git-lfs\n",
    "# check install\n",
    "!git lfs install"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5cdbf030",
   "metadata": {},
   "source": [
    "## Clone to Local"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "434bee3a-5913-48f4-86e3-16a3e42a2a16",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-04-02T12:27:51.878378Z",
     "iopub.status.busy": "2023-04-02T12:27:51.877672Z",
     "iopub.status.idle": "2023-04-02T12:31:47.892248Z",
     "shell.execute_reply": "2023-04-02T12:31:47.889487Z",
     "shell.execute_reply.started": "2023-04-02T12:27:51.878318Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cloning into 'llama-7b-hf'...\n",
      "remote: Enumerating objects: 91, done.\u001B[K\n",
      "remote: Counting objects: 100% (91/91), done.\u001B[K\n",
      "remote: Compressing objects: 100% (89/89), done.\u001B[K\n",
      "remote: Total 91 (delta 6), reused 0 (delta 0), pack-reused 0\u001B[K\n",
      "Unpacking objects: 100% (91/91), done.\n",
      "Filtering content: 100% (34/34), 12.55 GiB | 55.03 MiB/s, done.\n"
     ]
    }
   ],
   "source": [
    "!git clone https://huggingface.co/decapoda-research/llama-7b-hf"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d278f1e6",
   "metadata": {},
   "source": [
    "# Traing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "305b18a5",
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2023-04-02T12:32:32.832436Z",
     "iopub.status.busy": "2023-04-02T12:32:32.831674Z",
     "iopub.status.idle": "2023-04-02T12:33:20.272901Z",
     "shell.execute_reply": "2023-04-02T12:33:20.270820Z",
     "shell.execute_reply.started": "2023-04-02T12:32:32.832338Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "!git clone https://github.com/tloen/alpaca-lora\n",
    "\n",
    "!cd alpaca-lora && pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5cce5ab-77d9-40b0-9fa4-61f3de1381f2",
   "metadata": {},
   "source": [
    "## Training in Local"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b7a120f-acef-4e77-822c-d298f4920304",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Normal Traing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "712b1e50-fbb6-4ff1-bf09-62466be77e60",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-04-02T12:33:32.259709Z",
     "iopub.status.busy": "2023-04-02T12:33:32.259013Z",
     "iopub.status.idle": "2023-04-02T12:33:51.512707Z",
     "shell.execute_reply": "2023-04-02T12:33:51.510063Z",
     "shell.execute_reply.started": "2023-04-02T12:33:32.259649Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "!cd alpaca-lora && python finetune.py \\\n",
    "    --base_model '../llama-7b-hf' \\\n",
    "    --data_path 'yahma/alpaca-cleaned' \\\n",
    "    --output_dir '../test_lora.jsonl'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1b42535-5b7c-44d4-bc8b-46ce02b4c7b9",
   "metadata": {},
   "source": [
    "### Training with Custom parapmters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "857ee84b",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "!cd alpaca-lora && python finetune.py \\\n",
    "    --base_model '../llama-7b-hf' \\\n",
    "    --data_path 'test_lora.jsonl' \\\n",
    "    --output_dir './lora-alpaca' \\\n",
    "    --batch_size 128 \\\n",
    "    --micro_batch_size 4 \\\n",
    "    --num_epochs 3 \\\n",
    "    --learning_rate 1e-4 \\\n",
    "    --cutoff_len 512 \\\n",
    "    --val_set_size 2000 \\\n",
    "    --lora_r 8 \\\n",
    "    --lora_alpha 16 \\\n",
    "    --lora_dropout 0.05 \\\n",
    "    --lora_target_modules '[q_proj,v_proj]' \\\n",
    "    --train_on_inputs \\\n",
    "    --group_by_length"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2440efa9-eaea-4a2a-93bb-3ed71e7f8749",
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "source": [
    "# Run with Server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5db00cfd",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "!git clone https://huggingface.co/spaces/tloen/alpaca-lora alpaca-lora-web"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b124ab96",
   "metadata": {},
   "source": [
    "## 配置源（按需）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b7f26d8",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "!pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20152e13",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "!cd alpaca-lora-web && pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "581e4fb5",
   "metadata": {},
   "source": [
    "## Run Server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8cc8772b",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "# modify app.py huggingFace to Local\n",
    "# tokenizer = LlamaTokenizer.from_pretrained(\"../llama-7b-hf\")\n",
    "# BASE_MODEL = \"../llama-7b-hf\"\n",
    "!cd alpaca-lora-web && python app.py"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
